This Rmd includes code for creating “weightings” for the visits data, and starting to compare with case data, for Alameda County.
library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)
library(rvest)
library(RSelenium)
library(seleniumPipes)
library(dLagM)
library(jsonlite)
library(rgdal)
library(esri2sf)
library(readr)
library(arsenal)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
# Load the processing functions. This also loads the normalization functions.
source("safegraph_process_patterns_functions.R")
sg_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/'
sg_hourly_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/weekly-patterns/v2/hourly/'
Load the hourly visits data.
# # load places and weights
# bay_hourly_visits_places_309_322 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_3-09_3-22.rds"))
# bay_hourly_visits_places_323_405 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_3-23_4-05.rds"))
# bay_hourly_visits_places_406_412 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_4-06_4-12.rds"))
# bay_hourly_visits_places_413_426 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_4-13_4-26.rds"))
# bay_hourly_visits_places_427_510 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_4-27_5-10.rds"))
# bay_hourly_visits_places_511_524 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_5-11_5-24.rds"))
# bay_hourly_visits_places_525_531 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_5-25_5-31.rds"))
# bay_hourly_visits_places_601_607 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_6-01_6-07.rds"))
# bay_hourly_visits_places_608_621 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_6-08_6-21.rds"))
#
# # load visits by origin
# ac_hourly_visits_zip_309_322 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_3-09_3-22.rds"))
# ac_hourly_visits_zip_323_405 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_3-23_4-05.rds"))
# ac_hourly_visits_zip_406_412 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_4-06_4-12.rds"))
# ac_hourly_visits_zip_413_426 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_4-13_4-26.rds"))
# ac_hourly_visits_zip_427_510 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_4-27_5-10.rds"))
# ac_hourly_visits_zip_511_524 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_5-11_5-24.rds"))
# ac_hourly_visits_zip_525_531 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_5-25_5-31.rds"))
# ac_hourly_visits_zip_601_607 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_6-01_6-07.rds"))
# ac_hourly_visits_zip_608_621 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_6-08_6-21.rds"))
Process to get weighted visits.
# # input the visits from AC and the POI visits data for the whole Bay for the same date range, and the population by zip code
# getWeightedVisits <- function(ac_hourly_visits_zip, bay_hourly_visits_places) {
# ac_hourly_visits_zip_weights <- left_join(ac_hourly_visits_zip, bay_hourly_visits_places %>% dplyr::select(safegraph_place_id, hour, date, total_visits_hourly_avg, area_square_feet, hourly_visits_per_area, weekly_median_dwell) %>% rename(place_visits_per_area = hourly_visits_per_area, place_total_visits_avg = total_visits_hourly_avg)) %>%
# mutate(weighted_visits = total_visits_avg * place_visits_per_area,
# visit_hours = total_visits_avg * weekly_median_dwell/60, # dwell is in minutes, convert to hours
# visits_per_area = total_visits_avg / area_square_feet,
# weighted_visit_hours = visit_hours * place_visits_per_area,
# weighted_visit_hours_v2 = visit_hours * place_visits_per_area * weekly_median_dwell/60 # multiply the place visits by dwell time to also convert them to visit hours, also converting to hours again
# )
# return(ac_hourly_visits_zip_weights)
# }
#
# # run this for all the visits data
# ac_hourly_visits_zip_weights_309_322 <- getWeightedVisits(ac_hourly_visits_zip_309_322, bay_hourly_visits_places_309_322)
# ac_hourly_visits_zip_weights_323_405 <- getWeightedVisits(ac_hourly_visits_zip_323_405, bay_hourly_visits_places_323_405)
# ac_hourly_visits_zip_weights_406_412 <- getWeightedVisits(ac_hourly_visits_zip_406_412, bay_hourly_visits_places_406_412)
# ac_hourly_visits_zip_weights_413_426 <- getWeightedVisits(ac_hourly_visits_zip_413_426, bay_hourly_visits_places_413_426)
# ac_hourly_visits_zip_weights_427_510 <- getWeightedVisits(ac_hourly_visits_zip_427_510, bay_hourly_visits_places_427_510)
# ac_hourly_visits_zip_weights_511_524 <- getWeightedVisits(ac_hourly_visits_zip_511_524, bay_hourly_visits_places_511_524)
# ac_hourly_visits_zip_weights_525_531 <- getWeightedVisits(ac_hourly_visits_zip_525_531, bay_hourly_visits_places_525_531)
# ac_hourly_visits_zip_weights_601_607 <- getWeightedVisits(ac_hourly_visits_zip_601_607, bay_hourly_visits_places_601_607)
# ac_hourly_visits_zip_weights_608_621 <- getWeightedVisits(ac_hourly_visits_zip_608_621, bay_hourly_visits_places_608_621)
#
# # summarize for each day and create two data frames
# summarizeWeightedVisits <- function(ac_hourly_visits_zip_weights) {
# ac_daily_weighted_visits <- ac_hourly_visits_zip_weights %>%
# mutate(weighted_visits = ifelse(is.na(area_square_feet), 0, weighted_visits),
# weighted_visit_hours = ifelse(is.na(area_square_feet), 0, weighted_visit_hours),
# weighted_visit_hours_v2 = ifelse(is.na(area_square_feet), 0, weighted_visit_hours_v2),
# visits_per_area = ifelse(is.na(area_square_feet), 0, visits_per_area)) %>% # handle any values that have NAs for the area of the POI, because this introduces NAs in the weighted values and visits per area. this doesn't happen often, so shouldn't affect overall results, but I do this to prevent the NAs from causing problems later
# group_by(zipcode, date) %>%
# summarize(total_weighted_visits = sum(weighted_visits),
# total_visit_hours = sum(visit_hours),
# total_visits_per_area = sum(visits_per_area),
# total_weighted_visit_hours = sum(weighted_visit_hours),
# total_visits = sum(total_visits_avg),
# total_weighted_visit_hours_v2 = sum(weighted_visit_hours_v2))
#
# return(ac_daily_weighted_visits)
# }
#
# ac_weighted_visits_zip_1 <- rbind(summarizeWeightedVisits(ac_hourly_visits_zip_weights_309_322), summarizeWeightedVisits(ac_hourly_visits_zip_weights_323_405), summarizeWeightedVisits(ac_hourly_visits_zip_weights_406_412), summarizeWeightedVisits(ac_hourly_visits_zip_weights_413_426), summarizeWeightedVisits(ac_hourly_visits_zip_weights_427_510), summarizeWeightedVisits(ac_hourly_visits_zip_weights_511_524)) %>%
# filter(!is.na(zipcode))
#
# ac_weighted_visits_zip_2 <- rbind(summarizeWeightedVisits(ac_hourly_visits_zip_weights_525_531), summarizeWeightedVisits(ac_hourly_visits_zip_weights_601_607), summarizeWeightedVisits(ac_hourly_visits_zip_weights_608_621)) %>%
# filter(!is.na(zipcode))
#
# ac_weighted_visits_zip <- rbind(ac_weighted_visits_zip_1, ac_weighted_visits_zip_2)
#
# # save the RDS files
# saveRDS(ac_weighted_visits_zip_1, paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_weighted_03-09-20_05-24-20.rds"))
# saveRDS(ac_weighted_visits_zip_2, paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_weighted_05-25-20_06-21-20.rds"))
# load the visits data, weighted
ac_weighted_visits_zip_1 <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_weighted_03-09-20_05-24-20.rds"))
ac_weighted_visits_zip_2 <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_weighted_05-25-20_06-21-20.rds"))
ac_weighted_visits_zip <- rbind(ac_weighted_visits_zip_1, ac_weighted_visits_zip_2)
# load the visits data, unweighted
ac_daily_visits_zip_1 <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_03-09-20_05-24-20.rds"))
ac_daily_visits_zip_2 <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_05-25-20_06-21-20.rds"))
ac_daily_visits_zip <- rbind(ac_daily_visits_zip_1, ac_daily_visits_zip_2)
# compare to make sure that the weighted visits data "total visits" column is the same as the unweighted visits
summary(comparedf(ac_daily_visits_zip %>% dplyr::select(date, zipcode, total_visits_avg) %>% rename(total_visits = total_visits_avg) %>% mutate(total_visits = round(total_visits, 3)) %>% arrange(zipcode), ac_weighted_visits_zip %>% dplyr::select(date, zipcode, total_visits) %>% mutate(total_visits = round(total_visits, 3)) %>% arrange(zipcode)))
##
##
## Table: Summary of data.frames
##
## version arg ncol nrow
## -------- -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ----- -----
## x ac_daily_visits_zip %>% dplyr::select(date, zipcode, total_visits_avg) %>% rename(total_visits = total_visits_avg) %>% mutate(total_visits = round(total_visits, 3)) %>% arrange(zipcode) 3 5145
## y ac_weighted_visits_zip %>% dplyr::select(date, zipcode, total_visits) %>% mutate(total_visits = round(total_visits, 3)) %>% arrange(zipcode) 3 5145
##
##
##
## Table: Summary of overall comparison
##
## statistic value
## ------------------------------------------------------------ ------
## Number of by-variables 0
## Number of non-by variables in common 3
## Number of variables compared 3
## Number of variables in x but not y 0
## Number of variables in y but not x 0
## Number of variables compared with some values unequal 0
## Number of variables compared with all values equal 3
## Number of observations in common 5145
## Number of observations in x but not y 0
## Number of observations in y but not x 0
## Number of observations with some compared variables unequal 0
## Number of observations with all compared variables equal 5145
## Number of values unequal 0
##
##
##
## Table: Variables not shared
##
## | |
## |:-----------------------|
## |No variables not shared |
##
##
##
## Table: Other variables not compared
##
## | |
## |:-------------------------------|
## |No other variables not compared |
##
##
##
## Table: Observations not shared
##
## | |
## |:--------------------------|
## |No observations not shared |
##
##
##
## Table: Differences detected by variable
##
## var.x var.y n NAs
## ------------- ------------- --- ----
## date date 0 0
## zipcode zipcode 0 0
## total_visits total_visits 0 0
##
##
##
## Table: Differences detected
##
## | |
## |:-----------------------|
## |No differences detected |
##
##
##
## Table: Non-identical attributes
##
## | |
## |:---------------------------|
## |No non-identical attributes |
The output above checks that the “total visits” column in the weighted visits data is the same as the unweighted visits data, which it is, so that is good.
Comparing the weighted and unweighted visits in their predictive ability for cases.
# load the case data
ac_place_cases <- read.csv("Simone_Speizer/Alameda_County_Cumulative_Cases_By_Place_And_Zip.csv")
# handle the ones that are reported as <10 by calling them 5 cases. Note this is a simplification/choice I made and could be changed.
ac_cases_zip <- ac_place_cases %>%
rename(date = DtCreate) %>%
mutate(date = date %>% substr(1,10) %>% as.Date()) %>%
dplyr::select(c(date, contains("F9"))) %>% # only select zip code data
gather(key = "zipcode", value = "cases", -date) %>%
mutate(cases = ifelse(cases == "<10", "5", cases),
zipcode = zipcode %>% substr(2,6)) %>% # replace cases <10 with 10 and remove the "F" from zipcode names
mutate(cases = as.numeric(cases)) %>%
arrange(date)
# get Alameda County populations by zip code
# census data
acs_vars = readRDS("Simone_Speizer/censusData2018_acs_acs5.rds")
# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
regionString <- paste0("state:06+county:", county)
censusData <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = regionString,
vars = variableToPull
) %>%
mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
return(censusData)
}
# load block groups and their correspondence to ZCTAs
ac_bg_zctas <- readRDS("Simone_Speizer/ac_bg_ztcas.rds")
ac_bgs <- ac_bg_zctas %>% pull(blockgroup)
# get population data
ac_fips <- fips("CA", "Alameda") %>% substr(3,5)
ac_pop_zip <- pullCensus("B01003_001E", ac_fips) %>%
left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_pop_zip = sum(B01003_001E))
# join with cases
ac_cases_zip <- ac_cases_zip %>% left_join(ac_pop_zip) %>%
mutate(cases_by_pop = cases / total_pop_zip)
# get visits per capita
ac_weighted_visits_zip <- ac_weighted_visits_zip %>% left_join(ac_pop_zip) %>%
mutate(weighted_visits_per_capita = total_weighted_visits / total_pop_zip,
weighted_visit_hours_per_capita = total_weighted_visit_hours / total_pop_zip,
weighted_visit_hours_per_capita_v2 = total_weighted_visit_hours_v2 / total_pop_zip,
visits_per_capita = total_visits / total_pop_zip,
visit_hours_per_capita = total_visit_hours / total_pop_zip,
visits_per_area_per_capita = total_visits_per_area / total_pop_zip)
Compare these metrics.
Weighted visits = weighted by the total number of visits per hour and the place area
Visit-hours = weighted by the median dwell time for each POI
Weighted visit-hours = visit hours also weighted by the total number of visits per hour and the place area
Here I try using change in cases from 4/15 through 6/23 and visits from 4/1 through 6/9.
case_start <- as.Date("2020-04-15")
case_end <- as.Date("2020-06-23")
visits_start <- as.Date("2020-04-01")
visits_end <- as.Date("2020-06-09")
ac_init_cases <- ac_cases_zip %>%
filter(date == case_start) %>%
dplyr::select(zipcode, cases_by_pop) %>%
rename(init_cases_by_pop = cases_by_pop) %>%
mutate(log_init_cases_by_pop = log(init_cases_by_pop))
# max cases (end date)
ac_final_cases <- ac_cases_zip %>%
filter(date == case_end) %>%
dplyr::select(zipcode, cases_by_pop, total_pop_zip) %>%
rename(fin_cases_by_pop = cases_by_pop) %>%
mutate(log_fin_cases_by_pop = log(fin_cases_by_pop))
# summarize visits add current and initial cases
ac_visits_cases_change <- ac_weighted_visits_zip %>%
filter(date >= visits_start & date <= visits_end) %>%
filter(!is.na(zipcode) & !is.na(total_visits)) %>%
group_by(zipcode) %>%
summarize(total_visits_per_capita = sum(visits_per_capita),
total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
total_visit_hours_per_capita = sum(visit_hours_per_capita),
total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
total_visits = sum(total_visits)) %>%
left_join(ac_final_cases) %>%
left_join(ac_init_cases) %>%
filter(init_cases_by_pop != 0) %>%
filter(!is.na(fin_cases_by_pop)) %>%
mutate(change_log_cases_by_pop = log_fin_cases_by_pop - log_init_cases_by_pop,
change_cases_by_pop = fin_cases_by_pop - init_cases_by_pop)
# get linear model values
ac_visits_cases_change_log_model <- lm(change_log_cases_by_pop ~ total_visits_per_capita, ac_visits_cases_change)
ac_visits_cases_change_model <- lm(change_cases_by_pop ~ total_visits_per_capita, ac_visits_cases_change)
ac_visits_cases_change_log_model_hours <- lm(change_log_cases_by_pop ~ total_visit_hours_per_capita, ac_visits_cases_change)
ac_visits_cases_change_model_hours <- lm(change_cases_by_pop ~ total_visit_hours_per_capita, ac_visits_cases_change)
ac_wt_visits_cases_change_log_model <- lm(change_log_cases_by_pop ~ total_weighted_visits_per_capita, ac_visits_cases_change)
ac_wt_visits_cases_change_model <- lm(change_cases_by_pop ~ total_weighted_visits_per_capita, ac_visits_cases_change)
ac_wt_visits_cases_change_log_model_hours <- lm(change_log_cases_by_pop ~ total_weighted_visit_hours_per_capita, ac_visits_cases_change)
ac_wt_visits_cases_change_model_hours <- lm(change_cases_by_pop ~ total_weighted_visit_hours_per_capita, ac_visits_cases_change)
ac_visits_cases_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County (only non-zero starting cases)")
summary(ac_visits_cases_change_model)
##
## Call:
## lm(formula = change_cases_by_pop ~ total_visits_per_capita, data = ac_visits_cases_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0023390 -0.0014143 -0.0004434 0.0003204 0.0092913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.715e-03 1.523e-03 -1.126 0.2662
## total_visits_per_capita 9.096e-05 3.595e-05 2.530 0.0151 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002311 on 44 degrees of freedom
## Multiple R-squared: 0.127, Adjusted R-squared: 0.1072
## F-statistic: 6.402 on 1 and 44 DF, p-value: 0.01506
ac_visits_cases_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County (only non-zero starting cases)")
summary(ac_wt_visits_cases_change_model)
##
## Call:
## lm(formula = change_cases_by_pop ~ total_weighted_visits_per_capita,
## data = ac_visits_cases_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0034666 -0.0012542 -0.0000626 0.0003395 0.0075941
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.003798 0.001766 -2.151 0.0370 *
## total_weighted_visits_per_capita 0.020909 0.006215 3.364 0.0016 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002206 on 44 degrees of freedom
## Multiple R-squared: 0.2046, Adjusted R-squared: 0.1865
## F-statistic: 11.32 on 1 and 44 DF, p-value: 0.001599
ac_visits_cases_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visits_cases_change_model_hours), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model_hours)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County (only non-zero starting cases)")
summary(ac_visits_cases_change_model_hours)
##
## Call:
## lm(formula = change_cases_by_pop ~ total_visit_hours_per_capita,
## data = ac_visits_cases_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0024539 -0.0011830 -0.0006157 0.0003578 0.0086581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.948e-03 2.444e-03 -2.434 0.01908 *
## total_visit_hours_per_capita 1.979e-04 6.001e-05 3.298 0.00193 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002215 on 44 degrees of freedom
## Multiple R-squared: 0.1982, Adjusted R-squared: 0.18
## F-statistic: 10.88 on 1 and 44 DF, p-value: 0.001932
ac_visits_cases_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visits_cases_change_model_hours), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model_hours)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County (only non-zero starting cases)")
summary(ac_wt_visits_cases_change_model_hours)
##
## Call:
## lm(formula = change_cases_by_pop ~ total_weighted_visit_hours_per_capita,
## data = ac_visits_cases_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0022628 -0.0014172 -0.0007237 0.0002259 0.0095945
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0018705 0.0008607 2.173 0.0352 *
## total_weighted_visit_hours_per_capita 0.0004724 0.0021608 0.219 0.8280
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002472 on 44 degrees of freedom
## Multiple R-squared: 0.001085, Adjusted R-squared: -0.02162
## F-statistic: 0.04779 on 1 and 44 DF, p-value: 0.828
That’s really interesting–either weighted visits or visit-hours does better, but weighted visit hours does not. Weird. Try with change in log of cases.
# plot log
ac_visits_cases_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~change_log_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(lm(change_log_cases_by_pop ~ total_visits_per_capita, ac_visits_cases_change)), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County (only non-zero starting cases)")
summary(ac_visits_cases_change_log_model)
##
## Call:
## lm(formula = change_log_cases_by_pop ~ total_visits_per_capita,
## data = ac_visits_cases_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9301 -0.3215 -0.0451 0.3504 1.6460
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.281969 0.372242 0.757 0.4528
## total_visits_per_capita 0.023079 0.008786 2.627 0.0118 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5648 on 44 degrees of freedom
## Multiple R-squared: 0.1356, Adjusted R-squared: 0.1159
## F-statistic: 6.9 on 1 and 44 DF, p-value: 0.01181
ac_visits_cases_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~change_log_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County (only non-zero starting cases)")
summary(ac_wt_visits_cases_change_log_model)
##
## Call:
## lm(formula = change_log_cases_by_pop ~ total_weighted_visits_per_capita,
## data = ac_visits_cases_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.51894 -0.29000 0.02007 0.34812 1.32806
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07204 0.45236 0.159 0.8742
## total_weighted_visits_per_capita 4.16422 1.59204 2.616 0.0122 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5651 on 44 degrees of freedom
## Multiple R-squared: 0.1346, Adjusted R-squared: 0.1149
## F-statistic: 6.842 on 1 and 44 DF, p-value: 0.01215
ac_visits_cases_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~change_log_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visits_cases_change_log_model_hours), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model_hours)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County (only non-zero starting cases)")
summary(ac_visits_cases_change_log_model_hours)
##
## Call:
## lm(formula = change_log_cases_by_pop ~ total_visit_hours_per_capita,
## data = ac_visits_cases_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.34918 -0.37864 0.03592 0.29945 1.46259
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.97383 0.58011 -1.679 0.100298
## total_visit_hours_per_capita 0.05472 0.01424 3.842 0.000388 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5256 on 44 degrees of freedom
## Multiple R-squared: 0.2512, Adjusted R-squared: 0.2342
## F-statistic: 14.76 on 1 and 44 DF, p-value: 0.000388
ac_visits_cases_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~change_log_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visits_cases_change_log_model_hours), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model_hours)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County (only non-zero starting cases)")
summary(ac_wt_visits_cases_change_log_model_hours)
##
## Call:
## lm(formula = change_log_cases_by_pop ~ total_weighted_visit_hours_per_capita,
## data = ac_visits_cases_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3357 -0.3702 0.0484 0.3381 1.7101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1577 0.2111 5.484 1.92e-06 ***
## total_weighted_visit_hours_per_capita 0.2143 0.5300 0.404 0.688
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6063 on 44 degrees of freedom
## Multiple R-squared: 0.003704, Adjusted R-squared: -0.01894
## F-statistic: 0.1636 on 1 and 44 DF, p-value: 0.6879
Again, really interesting.
Trying with 1 week later, 16 day lag, 0.0004 baseline per person
case_weeks_past <- 1
visits_lag <- 16
baseline_cases_by_pop <- 0.0004
min_baseline_cases <- 10
ac_cases_cutoff <- ac_cases_zip %>%
filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
filter(cases_by_pop >= baseline_cases_by_pop) %>%
mutate(log_cases_by_pop = log(cases_by_pop))
zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
# get the change in cases and total visit hours in the time period of interest
later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visit_hours_per_capita_v2 <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
for (i in 1:length(zipcodes_in_cutoff)) {
curr_zip <- zipcodes_in_cutoff[i]
curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
# first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well
change_cases_curr = NA
change_log_cases_curr = NA
total_visits_curr = NA
total_visits_per_capita_curr = NA
total_visit_hours_per_capita_curr = NA
total_weighted_visit_hours_per_capita_curr = NA
total_weighted_visits_per_capita_curr = NA
total_weighted_visit_hours_per_capita_v2_curr = NA
if ((max(curr_vals$date) >= curr_vals$date[1] + case_weeks_past*7) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + case_weeks_past*7 - visits_lag) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - visits_lag)) {
# change in cases an amount of time later
change_cases_curr = curr_vals$cases_by_pop[case_weeks_past*7 + 1] - curr_vals$cases_by_pop[1]
change_log_cases_curr = curr_vals$log_cases_by_pop[case_weeks_past*7 + 1] - curr_vals$log_cases_by_pop[1]
# get the total visits in the time range of interest
visits_curr = ac_weighted_visits_zip %>%
filter(zipcode == curr_zip) %>%
filter(date >= (curr_vals$date[1] - visits_lag) & date < (curr_vals$date[case_weeks_past*7 + 1] - visits_lag)) %>%
summarize(total_visits_per_capita = sum(visits_per_capita),
total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
total_visit_hours_per_capita = sum(visit_hours_per_capita),
total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
total_visits = sum(total_visits))
total_visits_curr <- visits_curr$total_visits
total_visits_per_capita_curr = visits_curr$total_visits_per_capita
total_visit_hours_per_capita_curr = visits_curr$total_visit_hours_per_capita
total_weighted_visit_hours_per_capita_curr = visits_curr$total_weighted_visit_hours_per_capita
total_weighted_visits_per_capita_curr = visits_curr$total_weighted_visits_per_capita
total_weighted_visit_hours_per_capita_v2_curr = visits_curr$total_weighted_visit_hours_per_capita_v2
}
later_cases_change[i] = change_cases_curr
later_log_cases_change[i] = change_log_cases_curr
total_visits[i] = total_visits_curr
total_visits_per_capita[i] = total_visits_per_capita_curr
total_visit_hours_per_capita[i] = total_visit_hours_per_capita_curr
total_weighted_visits_per_capita[i] = total_weighted_visits_per_capita_curr
total_weighted_visit_hours_per_capita[i] = total_weighted_visit_hours_per_capita_curr
total_weighted_visit_hours_per_capita_v2[i] = total_weighted_visit_hours_per_capita_v2_curr
}
# combine
ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, total_visits_per_capita, total_visit_hours_per_capita, total_weighted_visits_per_capita, total_weighted_visit_hours_per_capita, total_weighted_visit_hours_per_capita_v2, stringsAsFactors = F) %>%
filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
rename(zipcode = zipcodes_in_cutoff) %>%
left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip))
# get linear model values
ac_visits_cases_change_model <- lm(later_cases_change ~ total_visits_per_capita, ac_data_change)
ac_wt_visits_cases_change_model <- lm(later_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
ac_visit_hours_cases_change_model <- lm(later_cases_change ~ total_visit_hours_per_capita, ac_data_change)
ac_wt_visit_hours_cases_change_model <- lm(later_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
ac_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_visits_per_capita, ac_data_change)
ac_wt_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
ac_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_visit_hours_per_capita, ac_data_change)
ac_wt_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_visits_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_visits_per_capita, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.490e-04 -1.091e-04 -8.720e-06 1.013e-04 3.475e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.153e-04 1.410e-04 -2.237 0.03107 *
## total_visits_per_capita 1.097e-04 3.144e-05 3.491 0.00121 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0001452 on 39 degrees of freedom
## Multiple R-squared: 0.238, Adjusted R-squared: 0.2185
## F-statistic: 12.18 on 1 and 39 DF, p-value: 0.001213
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_wt_visits_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_weighted_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.927e-04 -1.227e-04 -2.658e-05 3.731e-05 4.510e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.010e-04 8.571e-05 1.179 0.246
## total_weighted_visits_per_capita 2.461e-03 2.904e-03 0.847 0.402
##
## Residual standard error: 0.0001649 on 39 degrees of freedom
## Multiple R-squared: 0.01808, Adjusted R-squared: -0.007094
## F-statistic: 0.7182 on 1 and 39 DF, p-value: 0.4019
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_visit_hours_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.770e-04 -1.057e-04 -2.684e-05 7.257e-05 3.801e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.803e-05 9.840e-05 -0.996 0.32530
## total_visit_hours_per_capita 5.331e-05 1.898e-05 2.809 0.00772 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0001517 on 39 degrees of freedom
## Multiple R-squared: 0.1683, Adjusted R-squared: 0.147
## F-statistic: 7.893 on 1 and 39 DF, p-value: 0.007718
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_wt_visit_hours_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_weighted_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.690e-04 -1.318e-04 -3.093e-05 2.289e-05 4.448e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.649e-04 3.774e-05 4.369 8.95e-05 ***
## total_weighted_visit_hours_per_capita 1.130e-04 5.720e-04 0.198 0.844
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0001663 on 39 degrees of freedom
## Multiple R-squared: 0.001, Adjusted R-squared: -0.02462
## F-statistic: 0.03904 on 1 and 39 DF, p-value: 0.8444
That’s super weird. It doesn’t appear to be as good to weight visits with this metric. Try with change in log of cases.
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_visits_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40639 -0.15711 0.01299 0.12542 0.45660
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.54797 0.21112 -2.596 0.013243 *
## total_visits_per_capita 0.18899 0.04708 4.014 0.000263 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2175 on 39 degrees of freedom
## Multiple R-squared: 0.2924, Adjusted R-squared: 0.2742
## F-statistic: 16.11 on 1 and 39 DF, p-value: 0.0002626
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_wt_visits_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32882 -0.18310 -0.05612 0.11291 0.63530
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1634 0.1328 1.231 0.226
## total_weighted_visits_per_capita 4.4415 4.4981 0.987 0.330
##
## Residual standard error: 0.2554 on 39 degrees of freedom
## Multiple R-squared: 0.02439, Adjusted R-squared: -0.0006261
## F-statistic: 0.975 on 1 and 39 DF, p-value: 0.3295
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_visit_hours_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44799 -0.16073 -0.00480 0.06846 0.54282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.16887 0.14975 -1.128 0.26636
## total_visit_hours_per_capita 0.09086 0.02888 3.146 0.00316 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2309 on 39 degrees of freedom
## Multiple R-squared: 0.2024, Adjusted R-squared: 0.182
## F-statistic: 9.899 on 1 and 39 DF, p-value: 0.003163
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_wt_visit_hours_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28630 -0.20207 -0.06042 0.09037 0.62444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.27962 0.05865 4.768 2.6e-05 ***
## total_weighted_visit_hours_per_capita 0.18381 0.88883 0.207 0.837
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2584 on 39 degrees of freedom
## Multiple R-squared: 0.001095, Adjusted R-squared: -0.02452
## F-statistic: 0.04277 on 1 and 39 DF, p-value: 0.8372
That’s very odd, opposite of what we would expect. Maybe it’s something about having a very short time frame to look at cases in, and how the weighting by area makes the “weighted visits” number be very low? What if we look farther out in time, like 4 weeks past?
case_weeks_past <- 4
visits_lag <- 16
baseline_cases_by_pop <- 0.0004
min_baseline_cases <- 10
ac_cases_cutoff <- ac_cases_zip %>%
filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
filter(cases_by_pop >= baseline_cases_by_pop) %>%
mutate(log_cases_by_pop = log(cases_by_pop))
zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
# get the change in cases and total visit hours in the time period of interest
later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visit_hours_per_capita_v2 <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
for (i in 1:length(zipcodes_in_cutoff)) {
curr_zip <- zipcodes_in_cutoff[i]
curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
# first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well
change_cases_curr = NA
change_log_cases_curr = NA
total_visits_curr = NA
total_visits_per_capita_curr = NA
total_visit_hours_per_capita_curr = NA
total_weighted_visit_hours_per_capita_curr = NA
total_weighted_visits_per_capita_curr = NA
total_weighted_visit_hours_per_capita_v2_curr = NA
if ((max(curr_vals$date) >= curr_vals$date[1] + case_weeks_past*7) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + case_weeks_past*7 - visits_lag) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - visits_lag)) {
# change in cases an amount of time later
change_cases_curr = curr_vals$cases_by_pop[case_weeks_past*7 + 1] - curr_vals$cases_by_pop[1]
change_log_cases_curr = curr_vals$log_cases_by_pop[case_weeks_past*7 + 1] - curr_vals$log_cases_by_pop[1]
# get the total visits in the time range of interest
visits_curr = ac_weighted_visits_zip %>%
filter(zipcode == curr_zip) %>%
filter(date >= (curr_vals$date[1] - visits_lag) & date < (curr_vals$date[case_weeks_past*7 + 1] - visits_lag)) %>%
summarize(total_visits_per_capita = sum(visits_per_capita),
total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
total_visit_hours_per_capita = sum(visit_hours_per_capita),
total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
total_visits = sum(total_visits))
total_visits_curr <- visits_curr$total_visits
total_visits_per_capita_curr = visits_curr$total_visits_per_capita
total_visit_hours_per_capita_curr = visits_curr$total_visit_hours_per_capita
total_weighted_visit_hours_per_capita_curr = visits_curr$total_weighted_visit_hours_per_capita
total_weighted_visits_per_capita_curr = visits_curr$total_weighted_visits_per_capita
total_weighted_visit_hours_per_capita_v2_curr = visits_curr$total_weighted_visit_hours_per_capita_v2
}
later_cases_change[i] = change_cases_curr
later_log_cases_change[i] = change_log_cases_curr
total_visits[i] = total_visits_curr
total_visits_per_capita[i] = total_visits_per_capita_curr
total_visit_hours_per_capita[i] = total_visit_hours_per_capita_curr
total_weighted_visits_per_capita[i] = total_weighted_visits_per_capita_curr
total_weighted_visit_hours_per_capita[i] = total_weighted_visit_hours_per_capita_curr
total_weighted_visit_hours_per_capita_v2[i] = total_weighted_visit_hours_per_capita_v2_curr
}
# combine
ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, total_visits_per_capita, total_visit_hours_per_capita, total_weighted_visits_per_capita, total_weighted_visit_hours_per_capita, total_weighted_visit_hours_per_capita_v2, stringsAsFactors = F) %>%
filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
rename(zipcode = zipcodes_in_cutoff) %>%
left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip))
# get linear model values
ac_visits_cases_change_model <- lm(later_cases_change ~ total_visits_per_capita, ac_data_change)
ac_wt_visits_cases_change_model <- lm(later_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
ac_visit_hours_cases_change_model <- lm(later_cases_change ~ total_visit_hours_per_capita, ac_data_change)
ac_wt_visit_hours_cases_change_model <- lm(later_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
ac_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_visits_per_capita, ac_data_change)
ac_wt_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
ac_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_visit_hours_per_capita, ac_data_change)
ac_wt_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_visits_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_visits_per_capita, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.866e-04 -4.877e-04 -6.695e-05 1.951e-04 2.348e-03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.693e-04 8.482e-04 -0.553 0.584
## total_visits_per_capita 6.981e-05 4.660e-05 1.498 0.143
##
## Residual standard error: 0.0006874 on 34 degrees of freedom
## Multiple R-squared: 0.06192, Adjusted R-squared: 0.03433
## F-statistic: 2.244 on 1 and 34 DF, p-value: 0.1433
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_wt_visits_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_weighted_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.406e-04 -5.117e-04 -9.538e-05 3.163e-04 2.164e-03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0001264 0.0004307 -0.293 0.7710
## total_weighted_visits_per_capita 0.0079184 0.0035984 2.201 0.0347 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000664 on 34 degrees of freedom
## Multiple R-squared: 0.1247, Adjusted R-squared: 0.09893
## F-statistic: 4.842 on 1 and 34 DF, p-value: 0.03466
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_visit_hours_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0008171 -0.0004473 -0.0001271 0.0001614 0.0023696
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.220e-04 8.208e-04 -0.392 0.697
## total_visit_hours_per_capita 5.596e-05 4.091e-05 1.368 0.180
##
## Residual standard error: 0.000691 on 34 degrees of freedom
## Multiple R-squared: 0.05216, Adjusted R-squared: 0.02428
## F-statistic: 1.871 on 1 and 34 DF, p-value: 0.1803
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_wt_visit_hours_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_weighted_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0007129 -0.0004636 -0.0001606 0.0001911 0.0022268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0007634 0.0002335 3.270 0.00247 **
## total_weighted_visit_hours_per_capita 0.0001481 0.0011325 0.131 0.89675
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0007095 on 34 degrees of freedom
## Multiple R-squared: 0.0005025, Adjusted R-squared: -0.02889
## F-statistic: 0.01709 on 1 and 34 DF, p-value: 0.8967
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_visits_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86387 -0.31930 0.01055 0.17622 1.27021
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.35820 0.58773 -0.609 0.5463
## total_visits_per_capita 0.06890 0.03229 2.134 0.0402 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4763 on 34 degrees of freedom
## Multiple R-squared: 0.1181, Adjusted R-squared: 0.09216
## F-statistic: 4.553 on 1 and 34 DF, p-value: 0.04015
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_wt_visits_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68028 -0.40716 -0.00591 0.26808 1.10176
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1722 0.3038 0.567 0.5746
## total_weighted_visits_per_capita 6.1566 2.5378 2.426 0.0207 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4683 on 34 degrees of freedom
## Multiple R-squared: 0.1476, Adjusted R-squared: 0.1225
## F-statistic: 5.885 on 1 and 34 DF, p-value: 0.02072
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_visit_hours_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78463 -0.39103 -0.01861 0.27826 1.28838
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.18531 0.57335 -0.323 0.7485
## total_visit_hours_per_capita 0.05385 0.02858 1.884 0.0681 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4826 on 34 degrees of freedom
## Multiple R-squared: 0.09456, Adjusted R-squared: 0.06793
## F-statistic: 3.551 on 1 and 34 DF, p-value: 0.06809
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_wt_visit_hours_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67646 -0.35578 -0.08208 0.22805 1.15017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.86776 0.16687 5.200 9.44e-06 ***
## total_weighted_visit_hours_per_capita 0.09365 0.80941 0.116 0.909
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5071 on 34 degrees of freedom
## Multiple R-squared: 0.0003935, Adjusted R-squared: -0.02901
## F-statistic: 0.01339 on 1 and 34 DF, p-value: 0.9086
Now weighted visits do better, but visit hours do worse. Hmm.
Try 10 as baseline, 1 week past, 14 day lag.
case_weeks_past <- 1
visits_lag <- 14
baseline_cases <- 10
min_baseline_cases <- 10
ac_cases_cutoff <- ac_cases_zip %>%
filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
filter(cases >= baseline_cases) %>%
mutate(log_cases_by_pop = log(cases_by_pop))
zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
# get the change in cases and total visit hours in the time period of interest
later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visit_hours_per_capita_v2 <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
for (i in 1:length(zipcodes_in_cutoff)) {
curr_zip <- zipcodes_in_cutoff[i]
curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
# first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well
change_cases_curr = NA
change_log_cases_curr = NA
total_visits_curr = NA
total_visits_per_capita_curr = NA
total_visit_hours_per_capita_curr = NA
total_weighted_visit_hours_per_capita_curr = NA
total_weighted_visits_per_capita_curr = NA
total_weighted_visit_hours_per_capita_v2_curr = NA
if ((max(curr_vals$date) >= curr_vals$date[1] + case_weeks_past*7) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + case_weeks_past*7 - visits_lag) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - visits_lag)) {
# change in cases an amount of time later
change_cases_curr = curr_vals$cases_by_pop[case_weeks_past*7 + 1] - curr_vals$cases_by_pop[1]
change_log_cases_curr = curr_vals$log_cases_by_pop[case_weeks_past*7 + 1] - curr_vals$log_cases_by_pop[1]
# get the total visits in the time range of interest
visits_curr = ac_weighted_visits_zip %>%
filter(zipcode == curr_zip) %>%
filter(date >= (curr_vals$date[1] - visits_lag) & date < (curr_vals$date[case_weeks_past*7 + 1] - visits_lag)) %>%
summarize(total_visits_per_capita = sum(visits_per_capita),
total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
total_visit_hours_per_capita = sum(visit_hours_per_capita),
total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
total_visits = sum(total_visits))
total_visits_curr <- visits_curr$total_visits
total_visits_per_capita_curr = visits_curr$total_visits_per_capita
total_visit_hours_per_capita_curr = visits_curr$total_visit_hours_per_capita
total_weighted_visit_hours_per_capita_curr = visits_curr$total_weighted_visit_hours_per_capita
total_weighted_visits_per_capita_curr = visits_curr$total_weighted_visits_per_capita
total_weighted_visit_hours_per_capita_v2_curr = visits_curr$total_weighted_visit_hours_per_capita_v2
}
later_cases_change[i] = change_cases_curr
later_log_cases_change[i] = change_log_cases_curr
total_visits[i] = total_visits_curr
total_visits_per_capita[i] = total_visits_per_capita_curr
total_visit_hours_per_capita[i] = total_visit_hours_per_capita_curr
total_weighted_visits_per_capita[i] = total_weighted_visits_per_capita_curr
total_weighted_visit_hours_per_capita[i] = total_weighted_visit_hours_per_capita_curr
total_weighted_visit_hours_per_capita_v2[i] = total_weighted_visit_hours_per_capita_v2_curr
}
# combine
ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, total_visits_per_capita, total_visit_hours_per_capita, total_weighted_visits_per_capita, total_weighted_visit_hours_per_capita, total_weighted_visit_hours_per_capita_v2, stringsAsFactors = F) %>%
filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
rename(zipcode = zipcodes_in_cutoff) %>%
left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip))
# get linear model values
ac_visits_cases_change_model <- lm(later_cases_change ~ total_visits_per_capita, ac_data_change)
ac_wt_visits_cases_change_model <- lm(later_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
ac_visit_hours_cases_change_model <- lm(later_cases_change ~ total_visit_hours_per_capita, ac_data_change)
ac_wt_visit_hours_cases_change_model <- lm(later_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
ac_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_visits_per_capita, ac_data_change)
ac_wt_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
ac_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_visit_hours_per_capita, ac_data_change)
ac_wt_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_visits_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_visits_per_capita, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.658e-04 -7.291e-05 -2.233e-05 3.454e-05 3.710e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.837e-06 6.675e-05 -0.087 0.9308
## total_visits_per_capita 3.465e-05 1.334e-05 2.598 0.0132 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0001172 on 39 degrees of freedom
## Multiple R-squared: 0.1475, Adjusted R-squared: 0.1257
## F-statistic: 6.749 on 1 and 39 DF, p-value: 0.01316
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_wt_visits_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_weighted_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.624e-04 -9.873e-05 -4.120e-06 4.228e-05 3.867e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.559e-04 6.019e-05 2.590 0.0134 *
## total_weighted_visits_per_capita 1.706e-04 1.913e-03 0.089 0.9294
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0001269 on 39 degrees of freedom
## Multiple R-squared: 0.000204, Adjusted R-squared: -0.02543
## F-statistic: 0.007958 on 1 and 39 DF, p-value: 0.9294
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_visit_hours_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.905e-04 -8.660e-05 -2.140e-06 4.659e-05 3.676e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.698e-06 8.402e-05 -0.115 0.9087
## total_visit_hours_per_capita 3.193e-05 1.532e-05 2.084 0.0438 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0001204 on 39 degrees of freedom
## Multiple R-squared: 0.1002, Adjusted R-squared: 0.07711
## F-statistic: 4.342 on 1 and 39 DF, p-value: 0.04378
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_wt_visit_hours_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_weighted_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.686e-04 -7.892e-05 -1.329e-05 5.063e-05 3.795e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.755e-04 2.954e-05 5.940 6.27e-07
## total_weighted_visit_hours_per_capita -2.987e-04 4.520e-04 -0.661 0.513
##
## (Intercept) ***
## total_weighted_visit_hours_per_capita
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0001262 on 39 degrees of freedom
## Multiple R-squared: 0.01108, Adjusted R-squared: -0.01428
## F-statistic: 0.4368 on 1 and 39 DF, p-value: 0.5125
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_visits_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45833 -0.14879 -0.01571 0.12872 0.60262
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2523 0.1431 -1.763 0.0857 .
## total_visits_per_capita 0.1435 0.0286 5.017 1.19e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2512 on 39 degrees of freedom
## Multiple R-squared: 0.3922, Adjusted R-squared: 0.3766
## F-statistic: 25.17 on 1 and 39 DF, p-value: 1.188e-05
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_wt_visits_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44940 -0.24265 -0.08562 0.14159 0.64862
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4000 0.1527 2.620 0.0125 *
## total_weighted_visits_per_capita 1.2839 4.8530 0.265 0.7927
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3219 on 39 degrees of freedom
## Multiple R-squared: 0.001791, Adjusted R-squared: -0.0238
## F-statistic: 0.06999 on 1 and 39 DF, p-value: 0.7927
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_visit_hours_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52819 -0.18997 -0.05996 0.14897 0.60311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10956 0.20611 -0.532 0.59806
## total_visit_hours_per_capita 0.10248 0.03759 2.727 0.00954 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2953 on 39 degrees of freedom
## Multiple R-squared: 0.1601, Adjusted R-squared: 0.1386
## F-statistic: 7.434 on 1 and 39 DF, p-value: 0.00954
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_wt_visit_hours_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.47149 -0.20620 -0.07053 0.13658 0.63519
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.50176 0.07418 6.764 4.52e-08 ***
## total_weighted_visit_hours_per_capita -1.30628 1.13489 -1.151 0.257
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3169 on 39 degrees of freedom
## Multiple R-squared: 0.03285, Adjusted R-squared: 0.008056
## F-statistic: 1.325 on 1 and 39 DF, p-value: 0.2567
Super weird. Try again with 4 weeks past.
case_weeks_past <- 4
visits_lag <- 14
baseline_cases <- 10
min_baseline_cases <- 10
ac_cases_cutoff <- ac_cases_zip %>%
filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
filter(cases >= baseline_cases) %>%
mutate(log_cases_by_pop = log(cases_by_pop))
zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
# get the change in cases and total visit hours in the time period of interest
later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_weighted_visit_hours_per_capita_v2 <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
for (i in 1:length(zipcodes_in_cutoff)) {
curr_zip <- zipcodes_in_cutoff[i]
curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
# first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well
change_cases_curr = NA
change_log_cases_curr = NA
total_visits_curr = NA
total_visits_per_capita_curr = NA
total_visit_hours_per_capita_curr = NA
total_weighted_visit_hours_per_capita_curr = NA
total_weighted_visits_per_capita_curr = NA
total_weighted_visit_hours_per_capita_v2_curr = NA
if ((max(curr_vals$date) >= curr_vals$date[1] + case_weeks_past*7) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + case_weeks_past*7 - visits_lag) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - visits_lag)) {
# change in cases an amount of time later
change_cases_curr = curr_vals$cases_by_pop[case_weeks_past*7 + 1] - curr_vals$cases_by_pop[1]
change_log_cases_curr = curr_vals$log_cases_by_pop[case_weeks_past*7 + 1] - curr_vals$log_cases_by_pop[1]
# get the total visits in the time range of interest
visits_curr = ac_weighted_visits_zip %>%
filter(zipcode == curr_zip) %>%
filter(date >= (curr_vals$date[1] - visits_lag) & date < (curr_vals$date[case_weeks_past*7 + 1] - visits_lag)) %>%
summarize(total_visits_per_capita = sum(visits_per_capita),
total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
total_visit_hours_per_capita = sum(visit_hours_per_capita),
total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
total_visits = sum(total_visits))
total_visits_curr <- visits_curr$total_visits
total_visits_per_capita_curr = visits_curr$total_visits_per_capita
total_visit_hours_per_capita_curr = visits_curr$total_visit_hours_per_capita
total_weighted_visit_hours_per_capita_curr = visits_curr$total_weighted_visit_hours_per_capita
total_weighted_visits_per_capita_curr = visits_curr$total_weighted_visits_per_capita
total_weighted_visit_hours_per_capita_v2_curr = visits_curr$total_weighted_visit_hours_per_capita_v2
}
later_cases_change[i] = change_cases_curr
later_log_cases_change[i] = change_log_cases_curr
total_visits[i] = total_visits_curr
total_visits_per_capita[i] = total_visits_per_capita_curr
total_visit_hours_per_capita[i] = total_visit_hours_per_capita_curr
total_weighted_visits_per_capita[i] = total_weighted_visits_per_capita_curr
total_weighted_visit_hours_per_capita[i] = total_weighted_visit_hours_per_capita_curr
total_weighted_visit_hours_per_capita_v2[i] = total_weighted_visit_hours_per_capita_v2_curr
}
# combine
ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, total_visits_per_capita, total_visit_hours_per_capita, total_weighted_visits_per_capita, total_weighted_visit_hours_per_capita, total_weighted_visit_hours_per_capita_v2, stringsAsFactors = F) %>%
filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
rename(zipcode = zipcodes_in_cutoff) %>%
left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip))
# get linear model values
ac_visits_cases_change_model <- lm(later_cases_change ~ total_visits_per_capita, ac_data_change)
ac_wt_visits_cases_change_model <- lm(later_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
ac_visit_hours_cases_change_model <- lm(later_cases_change ~ total_visit_hours_per_capita, ac_data_change)
ac_wt_visit_hours_cases_change_model <- lm(later_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
ac_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_visits_per_capita, ac_data_change)
ac_wt_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
ac_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_visit_hours_per_capita, ac_data_change)
ac_wt_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_visits_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_visits_per_capita, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0007086 -0.0003710 -0.0001228 0.0003137 0.0024059
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.333e-04 5.859e-04 -0.569 0.5730
## total_visits_per_capita 5.941e-05 3.226e-05 1.842 0.0738 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0005926 on 36 degrees of freedom
## Multiple R-squared: 0.08611, Adjusted R-squared: 0.06073
## F-statistic: 3.392 on 1 and 36 DF, p-value: 0.07376
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_wt_visits_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_weighted_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0007431 -0.0004466 -0.0002294 0.0003675 0.0022658
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0002792 0.0003966 0.704 0.486
## total_weighted_visits_per_capita 0.0039268 0.0033373 1.177 0.247
##
## Residual standard error: 0.0006083 on 36 degrees of freedom
## Multiple R-squared: 0.03703, Adjusted R-squared: 0.01028
## F-statistic: 1.384 on 1 and 36 DF, p-value: 0.2471
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_visit_hours_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0006018 -0.0004484 -0.0001387 0.0003377 0.0024175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.435e-04 7.253e-04 -0.198 0.844
## total_visit_hours_per_capita 4.359e-05 3.580e-05 1.217 0.231
##
## Residual standard error: 0.0006075 on 36 degrees of freedom
## Multiple R-squared: 0.03954, Adjusted R-squared: 0.01286
## F-statistic: 1.482 on 1 and 36 DF, p-value: 0.2314
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
summary(ac_wt_visit_hours_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ total_weighted_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0006331 -0.0004378 -0.0002010 0.0003476 0.0022717
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0008356 0.0002156 3.876 0.000433
## total_weighted_visit_hours_per_capita -0.0005782 0.0010575 -0.547 0.587950
##
## (Intercept) ***
## total_weighted_visit_hours_per_capita
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0006174 on 36 degrees of freedom
## Multiple R-squared: 0.008234, Adjusted R-squared: -0.01931
## F-statistic: 0.2989 on 1 and 36 DF, p-value: 0.588
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_visits_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9609 -0.2659 -0.0020 0.2350 1.1300
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.02253 0.48775 -2.096 0.0431 *
## total_visits_per_capita 0.12152 0.02685 4.526 6.34e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4933 on 36 degrees of freedom
## Multiple R-squared: 0.3626, Adjusted R-squared: 0.3449
## F-statistic: 20.48 on 1 and 36 DF, p-value: 6.339e-05
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_wt_visits_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9683 -0.4826 -0.1709 0.5479 1.4410
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.8437 0.3993 2.113 0.0416 *
## total_weighted_visits_per_capita 2.7038 3.3599 0.805 0.4263
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6124 on 36 degrees of freedom
## Multiple R-squared: 0.01767, Adjusted R-squared: -0.009617
## F-statistic: 0.6476 on 1 and 36 DF, p-value: 0.4263
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_visit_hours_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8364 -0.3940 -0.1088 0.3823 1.1685
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.66004 0.67153 -0.983 0.33222
## total_visit_hours_per_capita 0.09044 0.03315 2.728 0.00979 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5625 on 36 degrees of freedom
## Multiple R-squared: 0.1713, Adjusted R-squared: 0.1483
## F-statistic: 7.442 on 1 and 36 DF, p-value: 0.009789
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_log_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_wt_visit_hours_cases_change_log_model)
##
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visit_hours_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9984 -0.5318 -0.1327 0.4514 1.3733
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.3629 0.2122 6.422 1.9e-07 ***
## total_weighted_visit_hours_per_capita -1.1521 1.0409 -1.107 0.276
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6077 on 36 degrees of freedom
## Multiple R-squared: 0.03291, Adjusted R-squared: 0.006043
## F-statistic: 1.225 on 1 and 36 DF, p-value: 0.2757